In [1]:
import math
import numpy as np
import pandas as pd
import BQhelper as bq
%matplotlib nbagg
import matplotlib.pyplot as plt
bq.project = "mlab-sandbox"
# bq.dataset = 'mattmathis'
# bq.UnitTestRunQuery()
# bq.UnitTestWriteQuery()
UnitTest=False
In [2]:
# Plot simple timeseries for a list of beacons
# UnitTest=True
query="""
SELECT
a.TestTime,
client.IP,
a.MeanThroughputMbps,
node._instruments
# FROM `mlab-sandbox.mm_unified_testing.unified_{selector}`
FROM `measurement-lab.ndt.unified_{selector}`
WHERE client.IP in ( {clientIP} )
AND test_date > '2019-03-01'
ORDER BY TestTime
"""
global StashData
def plotMultiBeacons(clients, columns=1, width=10, data=None, selector='downloads'):
if data is None:
clist = '"'+'", "'.join(clients)+'"'
data=bq.QueryTimestampTimeseries(query, clientIP=clist, selector=selector)
global StashData # Skip slow queries when debugging
StashData = data
rows = math.ceil(len(clients) / float(columns))
figLen = width/float(columns)*rows # assume square subplots
print('Size', figLen, width)
plt.rcParams['figure.figsize'] = [ width, figLen]
fig, axs = plt.subplots(nrows=rows, ncols=columns, squeeze=False, sharex='all')
for ax, client in zip([i for j in axs for i in j], clients):
print ('Beacon: '+client)
ax.set_title('Beacon: '+client)
cdata = data[data['IP'] == client]
ax.plot(cdata['MeanThroughputMbps'][cdata["_instruments"] == 'web100'], 'b.',
cdata['MeanThroughputMbps'][cdata["_instruments"] == 'tcpinfo'], 'r.')
fig.autofmt_xdate()
fig.show()
if UnitTest:
# %matplotlib nbagg
try:
TestData = StashData
print('Using StashData')
except:
pass
try:
TestData
except:
print('Genereating test data')
clients = [
'69.68.23.44', # Max deltaMean
'96.229.66.58', # Max deltaMax
'73.210.92.196',
]
clist = '"'+'", "'.join(clients)+'"'
TestData=bq.QueryTimestampTimeseries(query, clientIP=clist, selector='downloads')
# plt.ion()
clients=list(TestData['IP'].unique())
print (clients)
plotMultiBeacons(clients, data=TestData, columns=2, width=10)
print ('Done')
In [3]:
if UnitTest: # Don't go on to the main event
import time
time.sleep(5) # Make sure the asyncronous ploting finishes in the background
STOP # die
In [4]:
listQ='''
# Find clients with ~40 low varience weeks since 2019-01-01
# High varience weeks (e.g. step discontinuities) are ignored
WITH
rawStats AS (
SELECT
client.IP,
EXTRACT(WEEK FROM test_date) AS week,
STDDEV( a.MeanThroughputMbps ) AS dev,
AVG( a.MeanThroughputMbps ) AS meanTput,
count (*) AS tests
FROM
`measurement-lab.ndt.unified_{selector}`
WHERE
test_date > '2019-01-01'
GROUP BY
IP, week
HAVING
tests > 12 AND meanTput > 0
),
selectBest AS (
SELECT
IP,
count (*) AS weeks,
APPROX_QUANTILES(dev / meanTput, 100)[OFFSET(90)] as dev90,
AVG(meanTput) AS meanTput
FROM
rawStats
GROUP BY
IP
HAVING weeks > 50
ORDER BY dev90
)
SELECT * FROM selectBest LIMIT 10
'''
In [5]:
%matplotlib nbagg
selector="downloads"
selector="uploads"
stepbeacons=bq.QueryDataFrame(listQ, selector=selector)
plotMultiBeacons(stepbeacons['IP'], 2, width=10, selector=selector)
In [6]:
UnitTest=True # Rerunning is always testing
In [8]:
clients=['71.119.21.97']
plotMultiBeacons(clients, columns=1, width=6)